use lopnr persid year firmid work using "$datapath/A2_cleaned_masterdata.dta",clear

gcollapse (count) work ,by(lopnr persid year firmid)

merge m:1 persid firmid year 	using "$datapath/workcat_firmspec.dta"
keep if _merge!=2
drop 	_merge

merge m:1 persid year 		using "$datapath/workcat_byyear.dta"
keep if _merge!=2
drop 	_merge




*Genereate: Work-category for 1-5 years back in time 
*	    Not pretty but it does the job....
bys lopnr (year) : gen temp1 = 	cond(sector_firm==.  , sector_firm[_n-1], sector_firm )
bys lopnr (year) : gen temp2 = 	cond(sector_firm==.  , sector_firm[_n-2], sector_firm )
bys lopnr (year) : gen temp3 = 	cond(sector_firm==.  , sector_firm[_n-3], sector_firm )
bys lopnr (year) : gen temp4 = 	cond(sector_firm==.  , sector_firm[_n-4], sector_firm )
bys lopnr (year) : gen temp5 = 	cond(sector_firm==.  , sector_firm[_n-5], sector_firm )

*Impute 3 years back
gen byte	sector_firm3 	= sector_firm
replace 	sector_firm3	= temp1 if sector_firm3==.
replace 	sector_firm3	= temp2 if sector_firm3==.
replace 	sector_firm3	= temp3 if sector_firm3==.

*Impute 5 years back
gen byte 	sector_firm5 	= sector_firm3
replace 	sector_firm5	= temp4 if sector_firm5==.
replace 	sector_firm5	= temp5 if sector_firm5==.

drop temp*

*Genereate: Work-category for 1-5 years back in time 
*	    Not pretty but it does the job....
bys lopnr (year) : gen temp  = 	cond(sector==.  , sector[_n-1], sector )
bys lopnr (year) : gen temp2 = 	cond(sector==.  , sector[_n-2], sector )
bys lopnr (year) : gen temp3 = 	cond(sector==.  , sector[_n-3], sector )
bys lopnr (year) : gen temp4 = 	cond(sector==.  , sector[_n-4], sector )
bys lopnr (year) : gen temp5 = 	cond(sector==.  , sector[_n-5], sector )

*Impute 3 years back
gen byte 	sector3= sector
replace 	sector3 = temp  if sector3==.
replace 	sector3 = temp2 if sector3==.
replace 	sector3 = temp3 if sector3==.

*Impute 5 years back
gen byte 	sector5 = sector3
replace 	sector5	= temp4 if sector5==.
replace 	sector5	= temp5 if sector5==.

drop temp
*Imputing workcategory from previous years if missing
gen 	workcat_imputed = sector_firm
replace workcat_imputed = sector_firm3 	if workcat_imputed==.
replace workcat_imputed = sector_firm5 	if workcat_imputed==.
replace workcat_imputed = sector       	if workcat_imputed==.
replace workcat_imputed = sector3	if workcat_imputed==.
replace workcat_imputed = sector5       if workcat_imputed==.

*Generate white-collar indicator
gen 	WC = cond(missing(workcat_imputed), . ,cond(workcat_imputed==5,1,0))






keep lopnr persid year WC  sector sector_firm workcat_imputed
save "$datapath/A3_WCindicator.dta",replace

/*

// Impute WC indicator from SSYK codes for sector 2-4
*The idea is that if more than 80 percent of an SSYK code works in sector 5
*then 
preserve
gcollapse (max) sector* ssyk* , by(lopnr year)

gen ssyk1 = floor(ssyk3/100)
tab ssyk3 ssyk1 if inrange(ssyk3,100,500)

gen ssyk2 = floor(ssyk3/10)

gen WC = sector_firm==5 | sector==5
replace WC=. if sector_firm==. & sector==.
tab ssyk1

/*
preserve
	drop if ssyk3<100
	gcollapse (mean) WC, by(ssyk2 )
	twoway (bar WC ssyk2), name(a,replace) xline(50)
	twoway (bar WC ssyk2 if ssyk<50), name(b,replace) xline(50)
restore	
*/

		drop if ssyk3<100
		keep if ssykstatus==1
		gcollapse (mean) WC, by(ssyk3 )
		twoway (bar WC ssyk3), name(a,replace)
		twoway (bar WC ssyk3 if ssyk<300), name(b,replace) 
		gen WCssyk = WC>=.8
		tempfile temp
		save `temp'

restore

merge m:1 ssyk3 using `temp'
drop _merge

tab WCssyk sector_firm, col
tab WCssyk workcat_imputed,col

gen WC2		= WC
replace WC2	= WCssyk if inrange(sector,2,4) & WC2==0 & WCssyk==1
tab WC2 sector_firm
tab WC2 workcat_imputed

	*/
	
